library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
#library(see)
# For adding new columns
library(tibble)
library(MuMIn)
# setwd for Rmarkdowm
knitr::opts_knit$set(root.dir = "~/Documents/projects/nutrients_and_water_effects_2022/stats/")
# Load data
source("./scripts/code_join_data_full_dataset.R")
# Load functions
## Models
source("./R/functions_models.R")
source("./R/function_nlme_validation_plots.R")

Model Fitting

Models: Questions 1,2 and Mass fractions

Select performance and traits variables

data_for_models <-
    data_for_models %>%

        # Select variables for analysis
        dplyr::select(!c(rgr_slope, d15n, above_biomass, below_biomass,
                        rmf, smf, lmf))

\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]

# Take response variables names
response_vars_q1_q2 <-
    set_names(names(data_for_models)[c(5:14)])

response_vars_q1_q2
     total_biomass                agr                rgr   root_shoot_ratio 
   "total_biomass"              "agr"              "rgr" "root_shoot_ratio" 
              amax                 gs                wue               d13c 
            "amax"               "gs"              "wue"             "d13c" 
              pnue         Narea_g_m2 
            "pnue"       "Narea_g_m2" 
models_q1_q2 <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x,
                                                        data = data_for_models))

names(models_q1_q2)
 [1] "total_biomass"    "agr"              "rgr"              "root_shoot_ratio"
 [5] "amax"             "gs"               "wue"              "d13c"            
 [9] "pnue"             "Narea_g_m2"      
# Log models

## WUE log
model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode),
                         data = data_for_models)
## PNUE log
model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode),
                         data = data_for_models)

## Root-Shoot ratio
model_ratio_log <- lmer(log(root_shoot_ratio) ~ nfixer*treatment + init_height + (1|spcode),
                         data = data_for_models)
## Nitrogen log
model_nitrogen_log <- lmer(log(Narea_g_m2) ~ nfixer*treatment + init_height + (1|spcode),
                         data = data_for_models)

## Absolute growth rate
model_agr_log <- lmer(log(agr) ~ nfixer*treatment + init_height + (1|spcode),
                         data = data_for_models)
# Append log models to model list Q1 Q2
log_models <- list(model_q2_wue_log, model_q2_pnue_log,
                   model_ratio_log, model_nitrogen_log,
                   model_agr_log)


names(log_models) <- c("wue_log", "pnue_log", "ratio_log", "nitrogen_log",
                        "agr_log")

models_q1_q2 <- append(log_models, models_q1_q2)

length(models_q1_q2)
[1] 15
names(models_q1_q2)
 [1] "wue_log"          "pnue_log"         "ratio_log"        "nitrogen_log"    
 [5] "agr_log"          "total_biomass"    "agr"              "rgr"             
 [9] "root_shoot_ratio" "amax"             "gs"               "wue"             
[13] "d13c"             "pnue"             "Narea_g_m2"      

Model: Nodule count

# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")

# Delete unused variables
data_nodules_cleaned <-
    data_nodules_cleaned %>%

        # add id to rownames for keep track of the rows
        column_to_rownames("id") %>%
        dplyr::select(spcode, treatment, everything())

Previuosly I fitted non-log models and found out the log one is the best

log models

## After seeing the validation plots for the lmer_gaussian_log I decided to try
## to improve the log model using the nlme package

nlme_gaussian_log <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
                                    random = ~1|spcode,
                                    data = data_nodules_cleaned)


nlme_nodule_log_weights <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
                                    random = ~1|spcode,
                                    weights = varIdent(form = ~1|spcode),
                                    data = data_nodules_cleaned)

Compare models

# Check if the nlme_gaussian_log_weights is a better model

# Compare between nlme objects
anova(nlme_gaussian_log, nlme_nodule_log_weights)
                        Model df      AIC      BIC    logLik   Test  L.Ratio
nlme_gaussian_log           1  7 84.56385 96.89225 -35.28193                
nlme_nodule_log_weights     2  9 62.79085 78.64165 -22.39542 1 vs 2 25.77301
                        p-value
nlme_gaussian_log              
nlme_nodule_log_weights  <.0001
model.sel(nlme_gaussian_log, nlme_nodule_log_weights)
Model selection table 
                        (Int) int_hgh trt  weights df  logLik AICc delta weight
nlme_nodule_log_weights 3.344 0.03767   + vrI(spc)  9 -22.395 67.5  0.00      1
nlme_gaussian_log       2.943 0.05571   +           7 -35.282 87.4 19.84      0
Abbreviations:
 weights: vrI(spc) = 'varIdent(~1|spcode)'
Models ranked by AICc(x) 
Random terms (all models): 
  1 | spcode
AIC(nlme_gaussian_log, nlme_nodule_log_weights)
                        df      AIC
nlme_gaussian_log        7 84.56385
nlme_nodule_log_weights  9 62.79085
# Model improved, check assumptions for nlme_gaussian_log_weights

Models: Question 3

\[performance\sim treatment:fixer:scaled(trait)\ + initial\ height + random( 1|\ specie)\]

Scale preditors aka traits

data_for_models_scaled <-
    data_for_models %>%

    #  Narea_g_m2 not included since it has a high correlation with amax
    mutate(across(c(4, 9:13), scale))

Model RGR

lme_rgr_3way <- lme(rgr ~   treatment:nfixer:amax +
                            treatment:nfixer:gs  +
                            treatment:nfixer:wue +
                            treatment:nfixer:pnue +
                            treatment:nfixer:d13c +

                            # Added
                            #treatment:nfixer:Narea_g_m2 +
                            init_height ,
                  random = ~1 | spcode,
                  data = data_for_models_scaled)

Model AGR

lme_agr_3way <- lme(agr ~   treatment:nfixer:amax +
                            treatment:nfixer:gs  +
                            treatment:nfixer:wue +
                            treatment:nfixer:pnue +
                            treatment:nfixer:d13c +

                            # Added
                            #treatment:nfixer:Narea_g_m2 +
                            init_height ,
                  random = ~1 | spcode,
                  data = data_for_models_scaled)

Model Total Biomass

lme_total_biom_3way <- lme(total_biomass ~  treatment:nfixer:amax +
                                            treatment:nfixer:gs  +
                                            treatment:nfixer:wue +
                                            treatment:nfixer:pnue +
                                            treatment:nfixer:d13c +

                                            init_height,
                            random = ~1 | spcode,
                            data = data_for_models_scaled)

Model shoot root ratios

lme_3way_root_shoot_ratio <- lme(root_shoot_ratio ~  treatment:nfixer:amax +
                                                        treatment:nfixer:gs  +
                                                        treatment:nfixer:wue +
                                                        treatment:nfixer:pnue +
                                                        treatment:nfixer:d13c +

                                                        # Added
                                                        #treatment:nfixer:Narea_g_m2 +
                                                        init_height,
                            random = ~1 | spcode,
                            data = data_for_models_scaled)

Model Validation

Validation of models for questions 1,2 and Mass fractions

Collinearity

map(models_q1_q2, check_collinearity)
$wue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
           nfixer 1.30 [1.13, 1.72]         1.14      0.77     [0.58, 0.89]
        treatment 3.85 [2.94, 5.18]         1.96      0.26     [0.19, 0.34]
      init_height 1.05 [1.00, 3.34]         1.02      0.96     [0.30, 1.00]
 nfixer:treatment 4.57 [3.46, 6.18]         2.14      0.22     [0.16, 0.29]

$pnue_log
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
           nfixer 1.20 [1.06, 1.63]         1.09      0.84     [0.61, 0.94]
        treatment 3.85 [2.94, 5.19]         1.96      0.26     [0.19, 0.34]
      init_height 1.05 [1.00, 2.74]         1.03      0.95     [0.36, 1.00]
 nfixer:treatment 4.31 [3.27, 5.82]         2.08      0.23     [0.17, 0.31]

$ratio_log
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
           nfixer 1.03 [1.00, 6.70]         1.02      0.97     [0.15, 1.00]
        treatment 3.87 [2.96, 5.22]         1.97      0.26     [0.19, 0.34]
      init_height 1.06 [1.00, 2.12]         1.03      0.94     [0.47, 1.00]
 nfixer:treatment 3.92 [2.99, 5.29]         1.98      0.26     [0.19, 0.33]

$nitrogen_log
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
           nfixer 1.09 [1.01, 1.77]         1.04      0.92     [0.56, 0.99]
        treatment 3.87 [2.95, 5.21]         1.97      0.26     [0.19, 0.34]
      init_height 1.06 [1.00, 2.30]         1.03      0.94     [0.43, 1.00]
 nfixer:treatment 4.05 [3.09, 5.47]         2.01      0.25     [0.18, 0.32]

$agr_log
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
           nfixer 1.06 [1.00, 2.23]         1.03      0.94     [0.45, 1.00]
        treatment 3.87 [2.95, 5.21]         1.97      0.26     [0.19, 0.34]
      init_height 1.06 [1.00, 2.20]         1.03      0.94     [0.45, 1.00]
 nfixer:treatment 3.98 [3.04, 5.37]         2.00      0.25     [0.19, 0.33]

$total_biomass
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.86 [2.95, 5.20]         1.97      0.26     [0.19, 0.34]
           nfixer 1.12 [1.02, 1.66]         1.06      0.90     [0.60, 0.98]
      init_height 1.06 [1.00, 2.40]         1.03      0.95     [0.42, 1.00]
 treatment:nfixer 4.12 [3.13, 5.56]         2.03      0.24     [0.18, 0.32]

$agr
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.87 [2.95, 5.21]         1.97      0.26     [0.19, 0.34]
           nfixer 1.06 [1.00, 2.12]         1.03      0.94     [0.47, 1.00]
      init_height 1.06 [1.00, 2.22]         1.03      0.94     [0.45, 1.00]
 treatment:nfixer 3.99 [3.04, 5.39]         2.00      0.25     [0.19, 0.33]

$rgr
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.87 [2.96, 5.22]         1.97      0.26     [0.19, 0.34]
           nfixer 1.05 [1.00, 2.86]         1.02      0.95     [0.35, 1.00]
      init_height 1.06 [1.00, 2.17]         1.03      0.94     [0.46, 1.00]
 treatment:nfixer 3.96 [3.02, 5.34]         1.99      0.25     [0.19, 0.33]

$root_shoot_ratio
# Check for Multicollinearity

Low Correlation

             Term  VIF    VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.87 [2.96,  5.22]         1.97      0.26     [0.19, 0.34]
           nfixer 1.03 [1.00, 13.41]         1.01      0.97     [0.07, 1.00]
      init_height 1.07 [1.00,  2.10]         1.03      0.94     [0.48, 1.00]
 treatment:nfixer 3.91 [2.98,  5.27]         1.98      0.26     [0.19, 0.34]

$amax
# Check for Multicollinearity

Low Correlation

             Term  VIF    VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.88 [2.96,  5.22]         1.97      0.26     [0.19, 0.34]
           nfixer 1.03 [1.00, 20.89]         1.01      0.97     [0.05, 1.00]
      init_height 1.07 [1.00,  2.10]         1.03      0.94     [0.48, 1.00]
 treatment:nfixer 3.90 [2.98,  5.26]         1.98      0.26     [0.19, 0.34]

$gs
# Check for Multicollinearity

Low Correlation

        Term  VIF    VIF 95% CI Increased SE Tolerance Tolerance 95% CI
   treatment 3.83 [2.92,  5.15]         1.96      0.26     [0.19, 0.34]
      nfixer 1.92 [1.56,  2.53]         1.39      0.52     [0.40, 0.64]
 init_height 1.03 [1.00, 11.74]         1.01      0.97     [0.09, 1.00]

Moderate Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
 treatment:nfixer 6.08 [4.55, 8.28]         2.47      0.16     [0.12, 0.22]

$wue
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.85 [2.94, 5.19]         1.96      0.26     [0.19, 0.34]
           nfixer 1.19 [1.06, 1.62]         1.09      0.84     [0.62, 0.95]
      init_height 1.05 [1.00, 2.71]         1.03      0.95     [0.37, 1.00]
 treatment:nfixer 4.29 [3.26, 5.80]         2.07      0.23     [0.17, 0.31]

$d13c
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.85 [2.94, 5.19]         1.96      0.26     [0.19, 0.34]
           nfixer 1.20 [1.06, 1.63]         1.09      0.84     [0.61, 0.94]
      init_height 1.05 [1.00, 2.74]         1.03      0.95     [0.37, 1.00]
 treatment:nfixer 4.31 [3.27, 5.82]         2.07      0.23     [0.17, 0.31]

$pnue
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.85 [2.94, 5.18]         1.96      0.26     [0.19, 0.34]
           nfixer 1.30 [1.12, 1.71]         1.14      0.77     [0.58, 0.89]
      init_height 1.05 [1.00, 3.28]         1.02      0.96     [0.30, 1.00]
 treatment:nfixer 4.55 [3.44, 6.15]         2.13      0.22     [0.16, 0.29]

$Narea_g_m2
# Check for Multicollinearity

Low Correlation

             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
        treatment 3.86 [2.95, 5.21]         1.97      0.26     [0.19, 0.34]
           nfixer 1.10 [1.01, 1.70]         1.05      0.91     [0.59, 0.99]
      init_height 1.06 [1.00, 2.35]         1.03      0.95     [0.43, 1.00]
 treatment:nfixer 4.08 [3.11, 5.51]         2.02      0.24     [0.18, 0.32]
# No problems found here
map(models_q1_q2, check_model)
$wue_log


$pnue_log


$ratio_log


$nitrogen_log


$agr_log


$total_biomass


$agr


$rgr


$root_shoot_ratio


$amax


$gs


$wue


$d13c


$pnue


$Narea_g_m2

Validation of nodule count models

Checking log nodule count model’s assumptions

Zuur et al pp 84:

“Note that these residuals still show heterogeneity, but this is now allowed (because the residual variation differs depending on the chosen variance structure and values of the variance covariate). Hence, these residuals are less useful for the model validation process.”

nlme_validation_plots(nlme_nodule_log_weights, data = data_nodules_cleaned,
                      group = "spcode", variables = c("treatment") )

Validation of models for question 3

RGR

nlme_validation_plots(lme_rgr_3way, data = data_for_models_scaled, group = "spcode")

[1] "No variable specified inthe variables argument"

AGR

nlme_validation_plots(lme_agr_3way, data = data_for_models_scaled, group = "spcode")

[1] "No variable specified inthe variables argument"

Total biomass

nlme_validation_plots(lme_total_biom_3way, data = data_for_models_scaled,
                      group = "spcode")

[1] "No variable specified inthe variables argument"

Root-shoot ratio

nlme_validation_plots(lme_3way_root_shoot_ratio, data = data_for_models_scaled,
                      group = "spcode")

[1] "No variable specified inthe variables argument"

Save model lists as .RData

Models for Q1, Q2 and Mass fractions

models_q1_q2 <-

    models_q1_q2 %>%

    # Remove models that showed violation of the assumptions
    purrr::list_modify("pnue" = NULL, "wue" = NULL,
                       "Narea_g_m2" = NULL, "root_shoot_ratio" = NULL)

Nodule count

models_nodule_count <- list(nlme_gaussian_log, nlme_nodule_log_weights)
names(models_nodule_count) <- c("nlme_gaussian_log", "nlme_gaussian_log_weights")

3-way models

models_3way_q3 <- list(lme_rgr_3way, lme_total_biom_3way,
                            lme_3way_root_shoot_ratio)

names(models_3way_q3) <- c("lme_rgr_3way", "lme_total_biom_3way",
                                "lme_3way_root_shoot_ratio")
saveRDS(models_q1_q2, file = "./processed_data/models_q1_q2.RData")
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData")
saveRDS(models_3way_q3, file = "./processed_data/models_3way_q3.RData")